# import relevant packages
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
from matplotlib.patches import Patch
import matplotlib.lines as mlines
sys.path.insert(1,'/REDACTED/fairness/code/config')
from configureColors import *
sys.path.insert(1,'/REDACTED/fairness/code/utilities')
import weightedBinscatter as wb
import estimators as et
#sys.path.append('packages')
#import binscatter
import matplotlib.font_manager as fm
import matplotlib
import statsmodels.formula.api as smf
import statsmodels.api as sm
from statsmodels.iolib.smpickle import load_pickle
import joblib
from trajectoryPlotters import *

# set plot defaults
plt.style.use('/REDACTED/fairness/code/config/fairness.mplstyle')
fe = fm.FontEntry(
    fname='/REDACTED/fairness/code/config/fonts/cmunrm.ttf',
    name='latex')
fm.fontManager.ttflist.insert(0, fe)
matplotlib.rcParams['font.family'] = fe.name

colorsBNB = cdictBlackNonBlack()
colorsENE = cdictEITCNon()
cb = colorsBNB['Black']['color']
ab = colorsBNB['Black']['alpha']
cnb = colorsBNB['non-Black']['color']
anb = colorsBNB['non-Black']['alpha']
ce = colorsENE['eitc']['color']
cne = colorsENE['non']['color']
ae = colorsENE['eitc']['alpha']
ane = colorsENE['non']['alpha']

## Linear Estimator
def regEstimate(dataset, pbvarb = 'predicted_prob_black', outcome = 'audited'):
	model = sm.OLS.from_formula(outcome+' ~ '+pbvarb, data = dataset).fit(cov_type = 'HC1')
	coef = model.params[pbvarb]
	se = model.bse[pbvarb]
	black_aud_rate = model.params[0] + model.params[1]
	non_black_aud_rate = model.params[0] 
	return coef, se, black_aud_rate, non_black_aud_rate

## Probabilistic Estimator 
def chenEstimate(dataset, pbvarb = 'predicted_prob_black', outcome = 'audited'):
	black_aud_rate = (dataset[pbvarb]*dataset[outcome]).sum()/(dataset[pbvarb]).sum()
	non_black_aud_rate = ((1-dataset[pbvarb])*dataset[outcome]).sum()/(1-dataset[pbvarb]).sum()
	est =  black_aud_rate - non_black_aud_rate
	return est, black_aud_rate, non_black_aud_rate

## Get standard errors
def getSEs(dataset,pbvarb='predicted_prob_black',outcome='audited', seReg = 0.0):
	seMultiplier=getSEMultiplier(dataset,pbvarb)
	#seReg = regEstimate(dataset,pbvarb,outcome)[1]
	seChen = seReg*seMultiplier
	return seReg, seChen

def getSEMultiplier(dataset,pbvarb='predicted_prob_black'):
	return np.sqrt(dataset[pbvarb].var()/(dataset[pbvarb].mean()*(1-dataset[pbvarb].mean())))

### OUTPUT

out = '/REDACTED/'

### OP AUDIT
cols=['predicted_prob_black', 'aud_no_research_audits', 'isEIC', 'taxpayer_id_new']

dataBISG = pd.read_csv("/REDACTED/data/final/individualBISG2014_full_final.csv", usecols=cols)
len(dataBISG)
dataBISG= dataBISG[~dataBISG.predicted_prob_black.isnull()]
len(dataBISG)
dataBISG.taxpayer_id_new.value_counts()
dataBISG.taxpayer_id_new.isnull().sum()

dataBISG = dataBISG.drop(columns=['taxpayer_id_new'])

dataBISG.head()



#########################
#### Fig 3 (right): Estimated Audit Rate by Race
#########################

# calculate outcomes of interest 100 times, bootstrapping our overall population on each interation
from timeit import default_timer as timer
completely_full_df = pd.DataFrame(columns = ['Estimate', 'SE', 'Sample', 'Black', 'Linear', 'Iter'])

n_iter_bootstrap = 100
for i in range(n_iter_bootstrap):
    start = timer()
    print("Bootstrap iteration: "+str(i))
    # bootstrap overall dataset
    dataBISG_b = dataBISG.sample(frac = 1, random_state = i, replace = True)
    # calculate outcomes of interest
    coef, se, lin_b_ar, lin_nb_ar = regEstimate(dataBISG_b, pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
    est, prob_b_ar, prob_nb_ar = chenEstimate(dataBISG_b, pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
    se_lin, se_prob = getSEs(dataBISG_b,pbvarb='predicted_prob_black',outcome='aud_no_research_audits', seReg = se)
    coef_eic, se_eic, lin_b_ar_eic, lin_nb_ar_eic = regEstimate(dataBISG_b[dataBISG_b['isEIC']==1], pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
    est_eic, prob_b_ar_eic, prob_nb_ar_eic = chenEstimate(dataBISG_b[dataBISG_b['isEIC']==1], pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
    se_lin_eic, se_prob_eic = getSEs(dataBISG_b[dataBISG_b['isEIC']==1],pbvarb='predicted_prob_black',outcome='aud_no_research_audits', seReg = se_eic)
    coef_noneic, se_noneic, lin_b_ar_noneic, lin_nb_ar_noneic = regEstimate(dataBISG_b[dataBISG_b['isEIC']==0], pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
    est_noneic, prob_b_ar_noneic, prob_nb_ar_noneic = chenEstimate(dataBISG_b[dataBISG_b['isEIC']==0], pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
    se_lin_noneic, se_prob_noneic = getSEs(dataBISG_b[dataBISG_b['isEIC']==0],pbvarb='predicted_prob_black',outcome='aud_no_research_audits', seReg = se_noneic)
    # compile outcomes, turn into df
    data_disp = [
    [prob_nb_ar,se_prob,'f','Non-Black','p'],
    [lin_nb_ar, se_lin,'f','Non-Black','l'], 
    [prob_b_ar,se_prob,'f','Black','p'], 
    [lin_b_ar, se_lin,'f','Black','l'],
    [prob_nb_ar_eic,se_prob_eic,'e','Non-Black','p'], 
    [lin_nb_ar_eic,se_lin_eic,'e','Non-Black','l'], 
    [prob_b_ar_eic,se_prob_eic,'e','Black','p'], 
    [lin_b_ar_eic,se_lin_eic,'e','Black','l'],
    [prob_nb_ar_noneic,se_prob_noneic,'n','Non-Black','p'], 
    [lin_nb_ar_noneic,se_lin_noneic,'n','Non-Black','l'], 
    [prob_b_ar_noneic,se_prob_noneic,'n','Black','p'], 
    [lin_b_ar_noneic,se_lin_noneic,'n','Black','l']
    ]
    df = pd.DataFrame(data_disp, columns = ['Estimate', 'SE', 'Sample', 'Black', 'Linear'])
    df.Estimate = df.Estimate * 100
    df.SE = df.SE * 100
    #full_df = df.loc[df['Sample']=='f']
    full_df = df
    full_df['Iter'] = str(i)
    completely_full_df = pd.concat([completely_full_df, full_df])
    if i in [10,50,75]:
        completely_full_df.to_csv('/REDACTED/fig_3_bootstrap_progress.csv', index=False)
    end = timer()
    print("Time: "+str(end-start))

completely_full_df.to_csv('/REDACTED/fig_3_bootstrap.csv', index=False)
~~~~~~~~
completely_full_df = pd.read_csv('/REDACTED/fig_3_bootstrap.csv')

# calculate point estimates, which will be the height of the bars
coef, se, lin_b_ar, lin_nb_ar = regEstimate(dataBISG, pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
est, prob_b_ar, prob_nb_ar = chenEstimate(dataBISG, pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
se_lin, se_prob = getSEs(dataBISG,pbvarb='predicted_prob_black',outcome='aud_no_research_audits', seReg = se)

coef_eic, se_eic, lin_b_ar_eic, lin_nb_ar_eic = regEstimate(dataBISG[dataBISG['isEIC']==1], pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
est_eic, prob_b_ar_eic, prob_nb_ar_eic = chenEstimate(dataBISG[dataBISG['isEIC']==1], pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
se_lin_eic, se_prob_eic = getSEs(dataBISG[dataBISG['isEIC']==1],pbvarb='predicted_prob_black',outcome='aud_no_research_audits', seReg = se_eic)


coef_noneic, se_noneic, lin_b_ar_noneic, lin_nb_ar_noneic = regEstimate(dataBISG[dataBISG['isEIC']==0], pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
est_noneic, prob_b_ar_noneic, prob_nb_ar_noneic = chenEstimate(dataBISG[dataBISG['isEIC']==0], pbvarb = 'predicted_prob_black', outcome = 'aud_no_research_audits')
se_lin_noneic, se_prob_noneic = getSEs(dataBISG[dataBISG['isEIC']==0],pbvarb='predicted_prob_black',outcome='aud_no_research_audits', seReg = se_noneic)

# compile un-bootstrapped results
data_disp = [
[prob_nb_ar,se_prob,'f','Non-Black','p'],
[lin_nb_ar, se_lin,'f','Non-Black','l'], 
[prob_b_ar,se_prob,'f','Black','p'], 
[lin_b_ar, se_lin,'f','Black','l'],
[prob_nb_ar_eic,se_prob_eic,'e','Non-Black','p'], 
[lin_nb_ar_eic,se_lin_eic,'e','Non-Black','l'], 
[prob_b_ar_eic,se_prob_eic,'e','Black','p'], 
[lin_b_ar_eic,se_lin_eic,'e','Black','l'],
[prob_nb_ar_noneic,se_prob_noneic,'n','Non-Black','p'], 
[lin_nb_ar_noneic,se_lin_noneic,'n','Non-Black','l'], 
[prob_b_ar_noneic,se_prob_noneic,'n','Black','p'], 
[lin_b_ar_noneic,se_lin_noneic,'n','Black','l']
]



def labelbar(ax,rects,nsig=2,fontsize=20,additiveeps=0):
    for rect in rects:
        height = rect.get_height()
        sigfigstr = '%.'+str(nsig)+'f'
        print(sigfigstr)
        ax.text(rect.get_x() + rect.get_width()/2,1.005*height+additiveeps, sigfigstr % (height),ha='center',va='bottom',fontsize=fontsize)


df = pd.DataFrame(data_disp, columns = ['Estimate', 'SE', 'Sample', 'Black', 'Linear'])
df.Estimate = df.Estimate * 100
df.SE = df.SE * 100

full_df = df.loc[df['Sample']=='f']

# calculate upper and lower bounds on 95% confidence interval
f_b_p = completely_full_df[(completely_full_df.Sample == 'f') & (completely_full_df.Black == 'Black') & (completely_full_df.Linear == 'p')]
lb_black_prob = (f_b_p.Estimate.sort_values().tolist()[2] + f_b_p.Estimate.sort_values().tolist()[3])/2
ub_black_prob = (f_b_p.Estimate.sort_values().tolist()[96] + f_b_p.Estimate.sort_values().tolist()[97])/2

f_nb_p = completely_full_df[(completely_full_df.Sample == 'f') & (completely_full_df.Black == 'Non-Black') & (completely_full_df.Linear == 'p')]
lb_non_black_prob = (f_nb_p.Estimate.sort_values().tolist()[2] + f_nb_p.Estimate.sort_values().tolist()[3])/2
ub_non_black_prob = (f_nb_p.Estimate.sort_values().tolist()[96] + f_nb_p.Estimate.sort_values().tolist()[97])/2

f_b_l = completely_full_df[(completely_full_df.Sample == 'f') & (completely_full_df.Black == 'Black') & (completely_full_df.Linear == 'l')]
lb_black_lin = (f_b_l.Estimate.sort_values().tolist()[2] + f_b_l.Estimate.sort_values().tolist()[3])/2
ub_black_lin = (f_b_l.Estimate.sort_values().tolist()[96] + f_b_l.Estimate.sort_values().tolist()[97])/2

f_nb_l = completely_full_df[(completely_full_df.Sample == 'f') & (completely_full_df.Black == 'Non-Black') & (completely_full_df.Linear == 'l')]
lb_non_black_lin = (f_nb_l.Estimate.sort_values().tolist()[2] + f_nb_l.Estimate.sort_values().tolist()[3])/2
ub_non_black_lin = (f_nb_l.Estimate.sort_values().tolist()[96] + f_nb_l.Estimate.sort_values().tolist()[97])/2

# add these results to our original df
yerr_neg_black_prob = full_df.Estimate.loc[(full_df['Linear']=='p') & (full_df['Black']=='Black')].iloc[0] - lb_black_prob
yerr_pos_black_prob = ub_black_prob - full_df.Estimate.loc[(full_df['Linear']=='p') & (full_df['Black']=='Black')].iloc[0]

yerr_neg_non_black_prob = full_df.Estimate.loc[(full_df['Linear']=='p') & (full_df['Black']=='Non-Black')].iloc[0] - lb_non_black_prob
yerr_pos_non_black_prob = ub_non_black_prob - full_df.Estimate.loc[(full_df['Linear']=='p') & (full_df['Black']=='Non-Black')].iloc[0]

yerr_neg_black_lin = full_df.Estimate.loc[(full_df['Linear']=='l') & (full_df['Black']=='Black')].iloc[0] - lb_black_lin
yerr_pos_black_lin = ub_black_lin - full_df.Estimate.loc[(full_df['Linear']=='l') & (full_df['Black']=='Black')].iloc[0]

yerr_neg_non_black_lin = full_df.Estimate.loc[(full_df['Linear']=='l') & (full_df['Black']=='Non-Black')].iloc[0] - lb_non_black_lin
yerr_pos_non_black_lin = ub_non_black_lin - full_df.Estimate.loc[(full_df['Linear']=='l') & (full_df['Black']=='Non-Black')].iloc[0]

full_df['yerr_neg'] = [yerr_neg_non_black_prob, yerr_neg_non_black_lin, yerr_neg_black_prob, yerr_neg_black_lin]
full_df['yerr_pos'] = [yerr_pos_non_black_prob, yerr_pos_non_black_lin, yerr_pos_black_prob, yerr_pos_black_lin]


# plot bars with new bootstrapped standared errors
idx = np.arange(len(full_df.Linear.unique()))
bwidth = 0.2
legend_elements = [Patch(facecolor = "white", edgecolor='k', label = 'Probabilistic'), Patch(facecolor = "white", edgecolor='k', hatch = "//", label = 'Linear')]
ax = plt.subplot(111)
arpnb = ax.bar(idx[0]-bwidth, full_df.Estimate.loc[(full_df['Linear']=='p') & (full_df['Black']=='Non-Black')], yerr = (full_df.yerr_neg.loc[(full_df['Linear']=='p') & (full_df['Black']=='Non-Black')], full_df.yerr_pos.loc[(full_df['Linear']=='p') & (full_df['Black']=='Non-Black')]) ,align = 'center', width = (bwidth-0.03), color = 'purple', alpha = 0.4, label = 'Probabilistic Non-Black', capsize = 15)
arlnb = ax.bar(idx[0],full_df.Estimate.loc[(full_df['Linear']=='l') & (full_df['Black']=='Non-Black')] , yerr = (full_df.yerr_neg.loc[(full_df['Linear']=='l') & (full_df['Black']=='Non-Black')], full_df.yerr_pos.loc[(full_df['Linear']=='l') & (full_df['Black']=='Non-Black')]) ,align = 'center', width = (bwidth-0.03), color = 'purple', alpha = 0.4, label = 'Linear Non-Black', capsize = 15, hatch = "//")
arpb = ax.bar((idx[1]/2)-bwidth, full_df.Estimate.loc[(full_df['Linear']=='p') & (full_df['Black']=='Black')], yerr = (full_df.yerr_neg.loc[(full_df['Linear']=='p') & (full_df['Black']=='Black')], full_df.yerr_pos.loc[(full_df['Linear']=='p') & (full_df['Black']=='Black')]) ,align = 'center', width = (bwidth-0.03), color = 'purple', alpha = 1, label = 'Probabilistic Black', capsize = 15)
arlb = ax.bar((idx[1]/2),full_df.Estimate.loc[(full_df['Linear']=='l') & (full_df['Black']=='Black')] , yerr = (full_df.yerr_neg.loc[(full_df['Linear']=='l') & (full_df['Black']=='Black')], full_df.yerr_pos.loc[(full_df['Linear']=='l') & (full_df['Black']=='Black')])  ,align = 'center', width = (bwidth-0.03), color = 'purple', alpha = 1, label = 'Linear Black', capsize = 15, hatch = "//", edgecolor = 'white')
ax.set_xticks((idx/2) - bwidth/2)
ax.set_xticklabels(full_df.Black.unique())
ax.set_xlabel('')
ax.set_ylabel('Audit Rate (%)')
labelbar(ax, arpb)
labelbar(ax, arlb)
labelbar(ax, arpnb)
labelbar(ax, arlnb)
ax.legend(handles = legend_elements)
plt.savefig('/REDACTED/fullpop_audit_race_estimators_bootstrap_V2.png')

plt.close()

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#########################
#### Fig 5 (right): Estimated Audit Rate by Race
#########################

samp_df = df.loc[df['Sample']!='f']

### calculate upper and lower bounds for 95% confidence intervals

e_nb_p = completely_full_df[(completely_full_df.Sample == 'e') & (completely_full_df.Black == 'Non-Black') & (completely_full_df.Linear == 'p')]
lb_non_black_eprob = (e_nb_p.Estimate.sort_values().tolist()[2] + e_nb_p.Estimate.sort_values().tolist()[3])/2
ub_non_black_eprob = (e_nb_p.Estimate.sort_values().tolist()[96] + e_nb_p.Estimate.sort_values().tolist()[97])/2

e_nb_l = completely_full_df[(completely_full_df.Sample == 'e') & (completely_full_df.Black == 'Non-Black') & (completely_full_df.Linear == 'l')]
lb_non_black_elin = (e_nb_l.Estimate.sort_values().tolist()[2] + e_nb_l.Estimate.sort_values().tolist()[3])/2
ub_non_black_elin = (e_nb_l.Estimate.sort_values().tolist()[96] + e_nb_l.Estimate.sort_values().tolist()[97])/2

e_b_p = completely_full_df[(completely_full_df.Sample == 'e') & (completely_full_df.Black == 'Black') & (completely_full_df.Linear == 'p')]
lb_black_eprob = (e_b_p.Estimate.sort_values().tolist()[2] + e_b_p.Estimate.sort_values().tolist()[3])/2
ub_black_eprob = (e_b_p.Estimate.sort_values().tolist()[96] + e_b_p.Estimate.sort_values().tolist()[97])/2

e_b_l = completely_full_df[(completely_full_df.Sample == 'e') & (completely_full_df.Black == 'Black') & (completely_full_df.Linear == 'l')]
lb_black_elin = (e_b_l.Estimate.sort_values().tolist()[2] + e_b_l.Estimate.sort_values().tolist()[3])/2
ub_black_elin = (e_b_l.Estimate.sort_values().tolist()[96] + e_b_l.Estimate.sort_values().tolist()[97])/2

n_nb_p = completely_full_df[(completely_full_df.Sample == 'n') & (completely_full_df.Black == 'Non-Black') & (completely_full_df.Linear == 'p')]
lb_non_black_nprob = (n_nb_p.Estimate.sort_values().tolist()[2] + n_nb_p.Estimate.sort_values().tolist()[3])/2
ub_non_black_nprob = (n_nb_p.Estimate.sort_values().tolist()[96] + n_nb_p.Estimate.sort_values().tolist()[97])/2

n_nb_l = completely_full_df[(completely_full_df.Sample == 'n') & (completely_full_df.Black == 'Non-Black') & (completely_full_df.Linear == 'l')]
lb_non_black_nlin = (n_nb_l.Estimate.sort_values().tolist()[2] + n_nb_l.Estimate.sort_values().tolist()[3])/2
ub_non_black_nlin = (n_nb_l.Estimate.sort_values().tolist()[96] + n_nb_l.Estimate.sort_values().tolist()[97])/2

n_b_p = completely_full_df[(completely_full_df.Sample == 'n') & (completely_full_df.Black == 'Black') & (completely_full_df.Linear == 'p')]
lb_black_nprob = (n_b_p.Estimate.sort_values().tolist()[2] + n_b_p.Estimate.sort_values().tolist()[3])/2
ub_black_nprob = (n_b_p.Estimate.sort_values().tolist()[96] + n_b_p.Estimate.sort_values().tolist()[97])/2

n_b_l = completely_full_df[(completely_full_df.Sample == 'n') & (completely_full_df.Black == 'Black') & (completely_full_df.Linear == 'l')]
lb_black_nlin = (n_b_l.Estimate.sort_values().tolist()[2] + n_b_l.Estimate.sort_values().tolist()[3])/2
ub_black_nlin = (n_b_l.Estimate.sort_values().tolist()[96] + n_b_l.Estimate.sort_values().tolist()[97])/2

# add these results to our original df
yerr_neg_non_black_eprob = samp_df.Estimate.loc[(samp_df['Sample']=='e') & (samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black')].iloc[0] - lb_non_black_eprob
yerr_pos_non_black_eprob = ub_non_black_eprob - samp_df.Estimate.loc[(samp_df['Sample']=='e') & (samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black')].iloc[0]

yerr_neg_non_black_elin = samp_df.Estimate.loc[(samp_df['Sample']=='e') & (samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black')].iloc[0] - lb_non_black_elin
yerr_pos_non_black_elin = ub_non_black_elin - samp_df.Estimate.loc[(samp_df['Sample']=='e') & (samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black')].iloc[0]

yerr_neg_black_eprob = samp_df.Estimate.loc[(samp_df['Sample']=='e') & (samp_df['Linear']=='p') & (samp_df['Black']=='Black')].iloc[0] - lb_black_eprob
yerr_pos_black_eprob = ub_black_eprob - samp_df.Estimate.loc[(samp_df['Sample']=='e') & (samp_df['Linear']=='p') & (samp_df['Black']=='Black')].iloc[0]

yerr_neg_black_elin = samp_df.Estimate.loc[(samp_df['Sample']=='e') & (samp_df['Linear']=='l') & (samp_df['Black']=='Black')].iloc[0] - lb_black_elin
yerr_pos_black_elin = ub_black_elin - samp_df.Estimate.loc[(samp_df['Sample']=='e') & (samp_df['Linear']=='l') & (samp_df['Black']=='Black')].iloc[0]

yerr_neg_non_black_nprob = samp_df.Estimate.loc[(samp_df['Sample']=='n') & (samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black')].iloc[0] - lb_non_black_nprob
yerr_pos_non_black_nprob = ub_non_black_nprob - samp_df.Estimate.loc[(samp_df['Sample']=='n') & (samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black')].iloc[0]

yerr_neg_non_black_nlin = samp_df.Estimate.loc[(samp_df['Sample']=='n') & (samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black')].iloc[0] - lb_non_black_nlin
yerr_pos_non_black_nlin = ub_non_black_nlin - samp_df.Estimate.loc[(samp_df['Sample']=='n') & (samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black')].iloc[0]

yerr_neg_black_nprob = samp_df.Estimate.loc[(samp_df['Sample']=='n') & (samp_df['Linear']=='p') & (samp_df['Black']=='Black')].iloc[0] - lb_black_nprob
yerr_pos_black_nprob = ub_black_nprob - samp_df.Estimate.loc[(samp_df['Sample']=='n') & (samp_df['Linear']=='p') & (samp_df['Black']=='Black')].iloc[0]

yerr_neg_black_nlin = samp_df.Estimate.loc[(samp_df['Sample']=='n') & (samp_df['Linear']=='l') & (samp_df['Black']=='Black')].iloc[0] - lb_black_nlin
yerr_pos_black_nlin = ub_black_nlin - samp_df.Estimate.loc[(samp_df['Sample']=='n') & (samp_df['Linear']=='l') & (samp_df['Black']=='Black')].iloc[0]

samp_df['yerr_neg'] = [yerr_neg_non_black_eprob, yerr_neg_non_black_elin, yerr_neg_black_eprob, yerr_neg_black_elin, yerr_neg_non_black_nprob, yerr_neg_non_black_nlin, yerr_neg_black_nprob, yerr_neg_black_nlin]
samp_df['yerr_pos'] = [yerr_pos_non_black_eprob, yerr_pos_non_black_elin, yerr_pos_black_eprob, yerr_pos_black_elin, yerr_pos_non_black_nprob, yerr_pos_non_black_nlin, yerr_pos_black_nprob, yerr_pos_black_nlin]

# plot bars with new bootstrapped standard errors
idx1 = np.array([0,0.25,0.5,1,1.25,1.5])
idx2= np.array([0.25,1.25])
bwidth = 0.2
legend_elements = [Patch(facecolor = "white", edgecolor='k', label = 'Probabilistic'), Patch(facecolor = "white", edgecolor='k', hatch = "//", label = 'Linear')]
ax = plt.subplot(111)
arpnbe = ax.bar(idx1[0]-bwidth, samp_df.Estimate.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'e')], yerr = (samp_df.yerr_neg.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'e')], samp_df.yerr_pos.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'e')]) ,align = 'center', width = (bwidth-0.01), color = 'purple', alpha = 0.4, label = 'Probabilistic Non-Black', capsize = 15)
arlnbe = ax.bar(idx1[0],samp_df.Estimate.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'e')] , yerr = (samp_df.yerr_neg.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'e')], samp_df.yerr_pos.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'e')]) ,align = 'center', width = (bwidth-0.01), color = 'purple', alpha = 0.4, label = 'Linear Non-Black', capsize = 15, hatch = "//")
arpbe = ax.bar((idx1[2])-bwidth, samp_df.Estimate.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'e')], yerr = (samp_df.yerr_neg.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'e')], samp_df.yerr_pos.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'e')]) ,align = 'center', width = (bwidth-0.01), color = 'purple', alpha = 1, label = 'Probabilistic Black', capsize = 15)
arlbe = ax.bar((idx1[2]),samp_df.Estimate.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'e')] , yerr = (samp_df.yerr_neg.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'e')], samp_df.yerr_pos.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'e')]) ,align = 'center', width = (bwidth-0.01), color = 'purple', alpha = 1, label = 'Linear Black', capsize = 15, hatch = "//", edgecolor = 'white')
arpnbne = ax.bar((idx1[3])-bwidth, samp_df.Estimate.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'n')], yerr = (samp_df.yerr_neg.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'n')], samp_df.yerr_pos.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'n')]) ,align = 'center', width = (bwidth-0.01), color = 'purple', alpha = 0.4, label = 'Probabilistic Non-Black', capsize = 15)
arlnbne = ax.bar((idx1[3]),samp_df.Estimate.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'n')] , yerr = (samp_df.yerr_neg.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'n')], samp_df.yerr_pos.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Non-Black') & (samp_df['Sample'] == 'n')]) ,align = 'center', width = (bwidth-0.01), color = 'purple', alpha = 0.4, label = 'Linear Non-Black', capsize = 15, hatch = "//")
arpbne = ax.bar((idx1[5])-bwidth, samp_df.Estimate.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'n')], yerr = (samp_df.yerr_neg.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'n')], samp_df.yerr_pos.loc[(samp_df['Linear']=='p') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'n')]) ,align = 'center', width = (bwidth-0.01), color = 'purple', alpha = 1, label = 'Probabilistic Black', capsize = 15)
arlbne = ax.bar((idx1[5]),samp_df.Estimate.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'n')] , yerr = (samp_df.yerr_neg.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'n')], samp_df.yerr_pos.loc[(samp_df['Linear']=='l') & (samp_df['Black']=='Black') & (samp_df['Sample'] == 'n')]) ,align = 'center', width = (bwidth-0.01), color = 'purple', alpha = 1, label = 'Linear Black', capsize = 15, hatch = "//", edgecolor = 'white')
ax.set_xticks(idx1- bwidth/2)
#ax.set_xticks(idx2 - bwidth/2)
#ax.set_xticks(idx1 - bwidth/2, minor = True)
ax.set_xticklabels(['Non-Black','\n\nEITC','Black','Non-Black','\n\nNon-EITC','Black'])
#ax.set_xlabel('EITC                                    Non-EITC')
ax.set_xlabel('')
ax.set_ylabel('Audit Rate (%)')
labelbar(ax, arpnbe)
labelbar(ax, arlnbe)
labelbar(ax, arpbe)
labelbar(ax, arlbe)
labelbar(ax, arpnbne)
labelbar(ax, arlnbne)
labelbar(ax, arpbne)
labelbar(ax, arlbne)
ax.legend(handles = legend_elements)
plt.savefig('/REDACTED/subpop_audit_rate_estimators_bootstrap.png')
plt.close()
